scprint
  • Home
  • structure
  • training
  • grn inference
  • cell embedding and classification
  • gene embeddings

documentation

  • utils
  • model
  • loaders
  • tasks
scprint
  • cell embedding and classification

In [1]:
Copied!
## import
import lamindb as ln
import scanpy as sc

from scprint import scPrint

from scdataloader.data import SimpleAnnDataset
from scdataloader import Preprocessor, Collator

from torch.utils.data import DataLoader

from scdataloader.utils import load_genes

import numpy as np
import anndata as ad
from scipy.sparse import csr_matrix
import pandas as pd

from scib_metrics.benchmark import Benchmarker

from lightning.pytorch import Trainer

%load_ext autoreload
%autoreload 2

import torch
torch.set_float32_matmul_precision('medium')
## import import lamindb as ln import scanpy as sc from scprint import scPrint from scdataloader.data import SimpleAnnDataset from scdataloader import Preprocessor, Collator from torch.utils.data import DataLoader from scdataloader.utils import load_genes import numpy as np import anndata as ad from scipy.sparse import csr_matrix import pandas as pd from scib_metrics.benchmark import Benchmarker from lightning.pytorch import Trainer %load_ext autoreload %autoreload 2 import torch torch.set_float32_matmul_precision('medium')
💡 lamindb instance: jkobject/scdataloader
2024-02-27 15:37:24,549:INFO - Created a temporary directory at /tmp/tmpn_kldnkg
2024-02-27 15:37:24,551:INFO - Writing /tmp/tmpn_kldnkg/_remote_module_non_scriptable.py
2024-02-27 15:37:24,551:INFO - Writing /tmp/tmpn_kldnkg/_remote_module_non_scriptable.py

preparing the model¶

In [48]:
Copied!
model.gene_encoder.embedding.weight
model.gene_encoder.embedding.weight
Out[48]:
Parameter containing:
tensor([[ 0.0037,  0.0200, -0.0034,  ...,  0.0202,  0.0345, -0.0313],
        [-0.0007,  0.0393, -0.0069,  ...,  0.0281,  0.0400, -0.0190],
        [-0.0175,  0.0621, -0.0132,  ...,  0.0161,  0.0288, -0.0111],
        ...,
        [-0.0217,  0.0621, -0.0146,  ...,  0.0505,  0.0368, -0.0321],
        [ 0.0173,  0.0463, -0.0083,  ...,  0.0037,  0.0115, -0.0153],
        [ 0.0525,  0.0525, -0.0402,  ...,  0.0067,  0.0007, -0.0331]],
       device='cuda:0')
In [61]:
Copied!
model.gene_encoder.embedding.weight[-5]
model.gene_encoder.embedding.weight[-5]
Out[61]:
tensor([-2.4019e-02, -1.1195e-02,  1.5377e-02, -1.5292e-02, -3.8597e-03,
         8.3640e-04, -2.7142e-02,  1.9008e-02, -8.6371e-03, -1.5238e-02,
        -1.6833e-02,  1.7925e-02, -2.9785e-03, -1.7331e-02,  1.6175e-02,
         2.5416e-03,  2.2125e-02, -6.5159e-03,  6.1229e-03, -8.2164e-04,
         1.5580e-03, -1.4043e-03,  5.7557e-03,  1.7301e-02,  1.7752e-02,
        -6.6907e-03,  2.1567e-02,  2.5564e-02, -2.2414e-03,  2.4870e-03,
        -7.5531e-03, -3.0830e-02, -7.8189e-03,  3.9240e-03, -1.8739e-02,
        -1.1813e-02, -5.8855e-03,  1.1147e-02,  1.1393e-02, -1.3492e-03,
        -1.2266e-02, -1.1268e-02,  1.9579e-02, -1.2649e-03,  3.9618e-02,
         8.5675e-03,  6.7224e-03,  2.3654e-02,  3.3809e-02, -6.0911e-04,
        -4.3977e-03, -3.0629e-03, -3.3121e-02,  1.3739e-02, -1.9411e-02,
         1.8377e-02,  4.3772e-03, -3.6158e-02,  3.2731e-03,  2.9397e-02,
        -8.0602e-03,  1.9550e-02, -9.7988e-03, -1.3292e-02, -6.1542e-03,
        -4.6554e-03, -4.3562e-03, -7.5146e-03,  1.5390e-02,  3.6237e-02,
         3.9053e-02,  4.0985e-03,  2.9162e-02,  5.7538e-03,  5.5284e-03,
        -2.5665e-02,  2.4280e-02,  8.1235e-03, -2.2968e-02,  7.5519e-03,
         7.4942e-03,  1.1842e-02,  2.1870e-02, -3.1771e-03,  1.5563e-02,
         2.4190e-02, -5.0837e-02, -6.7125e-03, -2.7193e-02,  1.2300e-02,
         1.9444e-02, -3.5305e-03,  4.0222e-02,  1.3722e-02,  1.2304e-02,
         5.9529e-03,  9.8232e-03,  1.6579e-02, -1.2398e-02,  6.0710e-06,
         2.0671e-02,  3.6629e-03, -1.4828e-02, -4.6302e-03,  4.1376e-02,
        -1.6236e-02,  6.6806e-03,  2.6937e-02,  3.4397e-02, -1.3208e-02,
         2.1328e-03, -2.2912e-02,  4.8714e-02, -1.6899e-03, -3.5980e-02,
        -2.0907e-03,  3.5940e-02,  2.6971e-02, -1.5465e-02,  1.8554e-02,
        -2.8039e-02, -2.6164e-02,  3.3551e-02, -3.4469e-02,  4.4606e-02,
        -7.3048e-03,  1.6316e-02,  3.2521e-02], device='cuda:0')
In [118]:
Copied!
embeddings = pd.read_parquet('../../data/temp/embeddings.parquet').loc[model.genes]
sembeddings = torch.nn.AdaptiveAvgPool1d(128)(
    torch.tensor(embeddings.values)
)
#model.gene_encoder.embedding.weight.data.copy_(torch.Tensor(sembeddings))
embeddings = pd.read_parquet('../../data/temp/embeddings.parquet').loc[model.genes] sembeddings = torch.nn.AdaptiveAvgPool1d(128)( torch.tensor(embeddings.values) ) #model.gene_encoder.embedding.weight.data.copy_(torch.Tensor(sembeddings))
In [120]:
Copied!
model.genes
model.genes
Out[120]:
['ENSG00000000003',
 'ENSG00000000005',
 'ENSG00000000419',
 'ENSG00000000457',
 'ENSG00000000460',
 'ENSG00000000938',
 'ENSG00000000971',
 'ENSG00000001036',
 'ENSG00000001084',
 'ENSG00000001167',
 'ENSG00000001460',
 'ENSG00000001461',
 'ENSG00000001497',
 'ENSG00000001561',
 'ENSG00000001617',
 'ENSG00000001626',
 'ENSG00000001629',
 'ENSG00000001630',
 'ENSG00000001631',
 'ENSG00000002016',
 'ENSG00000002330',
 'ENSG00000002549',
 'ENSG00000002586',
 'ENSG00000002587',
 'ENSG00000002726',
 'ENSG00000002745',
 'ENSG00000002746',
 'ENSG00000002822',
 'ENSG00000002834',
 'ENSG00000002919',
 'ENSG00000002933',
 'ENSG00000003056',
 'ENSG00000003096',
 'ENSG00000003137',
 'ENSG00000003147',
 'ENSG00000003249',
 'ENSG00000003393',
 'ENSG00000003400',
 'ENSG00000003402',
 'ENSG00000003436',
 'ENSG00000003509',
 'ENSG00000003756',
 'ENSG00000003987',
 'ENSG00000003989',
 'ENSG00000004059',
 'ENSG00000004139',
 'ENSG00000004142',
 'ENSG00000004399',
 'ENSG00000004455',
 'ENSG00000004468',
 'ENSG00000004478',
 'ENSG00000004487',
 'ENSG00000004534',
 'ENSG00000004660',
 'ENSG00000004700',
 'ENSG00000004766',
 'ENSG00000004776',
 'ENSG00000004777',
 'ENSG00000004779',
 'ENSG00000004799',
 'ENSG00000004809',
 'ENSG00000004838',
 'ENSG00000004846',
 'ENSG00000004848',
 'ENSG00000004864',
 'ENSG00000004866',
 'ENSG00000004897',
 'ENSG00000004939',
 'ENSG00000004948',
 'ENSG00000004961',
 'ENSG00000004975',
 'ENSG00000005001',
 'ENSG00000005007',
 'ENSG00000005020',
 'ENSG00000005022',
 'ENSG00000005059',
 'ENSG00000005073',
 'ENSG00000005075',
 'ENSG00000005100',
 'ENSG00000005102',
 'ENSG00000005108',
 'ENSG00000005156',
 'ENSG00000005175',
 'ENSG00000005187',
 'ENSG00000005189',
 'ENSG00000005194',
 'ENSG00000005206',
 'ENSG00000005238',
 'ENSG00000005243',
 'ENSG00000005249',
 'ENSG00000005302',
 'ENSG00000005339',
 'ENSG00000005379',
 'ENSG00000005381',
 'ENSG00000005421',
 'ENSG00000005436',
 'ENSG00000005448',
 'ENSG00000005469',
 'ENSG00000005471',
 'ENSG00000005483',
 'ENSG00000005486',
 'ENSG00000005513',
 'ENSG00000005700',
 'ENSG00000005801',
 'ENSG00000005810',
 'ENSG00000005812',
 'ENSG00000005844',
 'ENSG00000005882',
 'ENSG00000005884',
 'ENSG00000005889',
 'ENSG00000005893',
 'ENSG00000005961',
 'ENSG00000005981',
 'ENSG00000006007',
 'ENSG00000006015',
 'ENSG00000006016',
 'ENSG00000006025',
 'ENSG00000006042',
 'ENSG00000006047',
 'ENSG00000006059',
 'ENSG00000006062',
 'ENSG00000006071',
 'ENSG00000006116',
 'ENSG00000006118',
 'ENSG00000006125',
 'ENSG00000006128',
 'ENSG00000006194',
 'ENSG00000006210',
 'ENSG00000006282',
 'ENSG00000006283',
 'ENSG00000006327',
 'ENSG00000006377',
 'ENSG00000006432',
 'ENSG00000006451',
 'ENSG00000006453',
 'ENSG00000006459',
 'ENSG00000006468',
 'ENSG00000006530',
 'ENSG00000006534',
 'ENSG00000006555',
 'ENSG00000006576',
 'ENSG00000006606',
 'ENSG00000006607',
 'ENSG00000006611',
 'ENSG00000006625',
 'ENSG00000006634',
 'ENSG00000006638',
 'ENSG00000006652',
 'ENSG00000006659',
 'ENSG00000006695',
 'ENSG00000006704',
 'ENSG00000006712',
 'ENSG00000006715',
 'ENSG00000006740',
 'ENSG00000006744',
 'ENSG00000006747',
 'ENSG00000006756',
 'ENSG00000006757',
 'ENSG00000006788',
 'ENSG00000006831',
 'ENSG00000006837',
 'ENSG00000007001',
 'ENSG00000007038',
 'ENSG00000007047',
 'ENSG00000007062',
 'ENSG00000007080',
 'ENSG00000007129',
 'ENSG00000007168',
 'ENSG00000007171',
 'ENSG00000007174',
 'ENSG00000007202',
 'ENSG00000007216',
 'ENSG00000007237',
 'ENSG00000007255',
 'ENSG00000007264',
 'ENSG00000007306',
 'ENSG00000007312',
 'ENSG00000007314',
 'ENSG00000007341',
 'ENSG00000007350',
 'ENSG00000007372',
 'ENSG00000007376',
 'ENSG00000007384',
 'ENSG00000007392',
 'ENSG00000007402',
 'ENSG00000007516',
 'ENSG00000007520',
 'ENSG00000007541',
 'ENSG00000007545',
 'ENSG00000007866',
 'ENSG00000007908',
 'ENSG00000007923',
 'ENSG00000007933',
 'ENSG00000007944',
 'ENSG00000007952',
 'ENSG00000007968',
 'ENSG00000008018',
 'ENSG00000008056',
 'ENSG00000008083',
 'ENSG00000008086',
 'ENSG00000008118',
 'ENSG00000008128',
 'ENSG00000008130',
 'ENSG00000008196',
 'ENSG00000008197',
 'ENSG00000008226',
 'ENSG00000008256',
 'ENSG00000008277',
 'ENSG00000008282',
 'ENSG00000008283',
 'ENSG00000008294',
 'ENSG00000008300',
 'ENSG00000008311',
 'ENSG00000008323',
 'ENSG00000008324',
 'ENSG00000008382',
 'ENSG00000008394',
 'ENSG00000008405',
 'ENSG00000008438',
 'ENSG00000008441',
 'ENSG00000008513',
 'ENSG00000008516',
 'ENSG00000008517',
 'ENSG00000008710',
 'ENSG00000008735',
 'ENSG00000008838',
 'ENSG00000008853',
 'ENSG00000008869',
 'ENSG00000008952',
 'ENSG00000008988',
 'ENSG00000009307',
 'ENSG00000009335',
 'ENSG00000009413',
 'ENSG00000009694',
 'ENSG00000009709',
 'ENSG00000009724',
 'ENSG00000009765',
 'ENSG00000009780',
 'ENSG00000009790',
 'ENSG00000009830',
 'ENSG00000009844',
 'ENSG00000009950',
 'ENSG00000009954',
 'ENSG00000010017',
 'ENSG00000010030',
 'ENSG00000010072',
 'ENSG00000010165',
 'ENSG00000010219',
 'ENSG00000010244',
 'ENSG00000010256',
 'ENSG00000010270',
 'ENSG00000010278',
 'ENSG00000010282',
 'ENSG00000010292',
 'ENSG00000010295',
 'ENSG00000010310',
 'ENSG00000010318',
 'ENSG00000010319',
 'ENSG00000010322',
 'ENSG00000010327',
 'ENSG00000010361',
 'ENSG00000010379',
 'ENSG00000010404',
 'ENSG00000010438',
 'ENSG00000010539',
 'ENSG00000010610',
 'ENSG00000010626',
 'ENSG00000010671',
 'ENSG00000010704',
 'ENSG00000010803',
 'ENSG00000010810',
 'ENSG00000010818',
 'ENSG00000010932',
 'ENSG00000011007',
 'ENSG00000011009',
 'ENSG00000011021',
 'ENSG00000011028',
 'ENSG00000011052',
 'ENSG00000011083',
 'ENSG00000011105',
 'ENSG00000011114',
 'ENSG00000011132',
 'ENSG00000011143',
 'ENSG00000011198',
 'ENSG00000011201',
 'ENSG00000011243',
 'ENSG00000011258',
 'ENSG00000011260',
 'ENSG00000011275',
 'ENSG00000011295',
 'ENSG00000011304',
 'ENSG00000011332',
 'ENSG00000011347',
 'ENSG00000011376',
 'ENSG00000011405',
 'ENSG00000011422',
 'ENSG00000011426',
 'ENSG00000011451',
 'ENSG00000011454',
 'ENSG00000011465',
 'ENSG00000011478',
 'ENSG00000011485',
 'ENSG00000011523',
 'ENSG00000011566',
 'ENSG00000011590',
 'ENSG00000011600',
 'ENSG00000011638',
 'ENSG00000011677',
 'ENSG00000012048',
 'ENSG00000012061',
 'ENSG00000012124',
 'ENSG00000012171',
 'ENSG00000012174',
 'ENSG00000012211',
 'ENSG00000012223',
 'ENSG00000012232',
 'ENSG00000012504',
 'ENSG00000012660',
 'ENSG00000012779',
 'ENSG00000012817',
 'ENSG00000012822',
 'ENSG00000012963',
 'ENSG00000012983',
 'ENSG00000013016',
 'ENSG00000013275',
 'ENSG00000013288',
 'ENSG00000013293',
 'ENSG00000013297',
 'ENSG00000013306',
 'ENSG00000013364',
 'ENSG00000013374',
 'ENSG00000013375',
 'ENSG00000013392',
 'ENSG00000013441',
 'ENSG00000013503',
 'ENSG00000013523',
 'ENSG00000013561',
 'ENSG00000013563',
 'ENSG00000013573',
 'ENSG00000013583',
 'ENSG00000013588',
 'ENSG00000013619',
 'ENSG00000013725',
 'ENSG00000013810',
 'ENSG00000014123',
 'ENSG00000014138',
 'ENSG00000014164',
 'ENSG00000014216',
 'ENSG00000014257',
 'ENSG00000014641',
 'ENSG00000014824',
 'ENSG00000014914',
 'ENSG00000014919',
 'ENSG00000015133',
 'ENSG00000015153',
 'ENSG00000015171',
 'ENSG00000015285',
 'ENSG00000015413',
 'ENSG00000015475',
 'ENSG00000015479',
 'ENSG00000015520',
 'ENSG00000015532',
 'ENSG00000015568',
 'ENSG00000015592',
 'ENSG00000015676',
 'ENSG00000016082',
 'ENSG00000016391',
 'ENSG00000016402',
 'ENSG00000016490',
 'ENSG00000016602',
 'ENSG00000016864',
 'ENSG00000017260',
 'ENSG00000017427',
 'ENSG00000017483',
 'ENSG00000017797',
 'ENSG00000018189',
 'ENSG00000018236',
 'ENSG00000018280',
 'ENSG00000018408',
 'ENSG00000018510',
 'ENSG00000018610',
 'ENSG00000018625',
 'ENSG00000018699',
 'ENSG00000018869',
 'ENSG00000019102',
 'ENSG00000019144',
 'ENSG00000019169',
 'ENSG00000019186',
 'ENSG00000019485',
 'ENSG00000019505',
 'ENSG00000019549',
 'ENSG00000019582',
 'ENSG00000019991',
 'ENSG00000019995',
 'ENSG00000020129',
 'ENSG00000020181',
 'ENSG00000020256',
 'ENSG00000020426',
 'ENSG00000020577',
 'ENSG00000020633',
 'ENSG00000020922',
 'ENSG00000021300',
 'ENSG00000021355',
 'ENSG00000021461',
 'ENSG00000021488',
 'ENSG00000021574',
 'ENSG00000021645',
 'ENSG00000021762',
 'ENSG00000021776',
 'ENSG00000021826',
 'ENSG00000021852',
 'ENSG00000022267',
 'ENSG00000022277',
 'ENSG00000022355',
 'ENSG00000022556',
 'ENSG00000022567',
 'ENSG00000022840',
 'ENSG00000022976',
 'ENSG00000023041',
 'ENSG00000023171',
 'ENSG00000023191',
 'ENSG00000023228',
 'ENSG00000023287',
 'ENSG00000023318',
 'ENSG00000023330',
 'ENSG00000023445',
 'ENSG00000023516',
 'ENSG00000023572',
 'ENSG00000023608',
 'ENSG00000023697',
 'ENSG00000023734',
 'ENSG00000023839',
 'ENSG00000023892',
 'ENSG00000023902',
 'ENSG00000023909',
 'ENSG00000024048',
 'ENSG00000024422',
 'ENSG00000024526',
 'ENSG00000024862',
 'ENSG00000025039',
 'ENSG00000025156',
 'ENSG00000025293',
 'ENSG00000025423',
 'ENSG00000025434',
 'ENSG00000025708',
 'ENSG00000025770',
 'ENSG00000025772',
 'ENSG00000025796',
 'ENSG00000025800',
 'ENSG00000026025',
 'ENSG00000026036',
 'ENSG00000026103',
 'ENSG00000026297',
 'ENSG00000026508',
 'ENSG00000026559',
 'ENSG00000026652',
 'ENSG00000026751',
 'ENSG00000026950',
 'ENSG00000027001',
 'ENSG00000027075',
 'ENSG00000027644',
 'ENSG00000027697',
 'ENSG00000027847',
 'ENSG00000027869',
 'ENSG00000028116',
 'ENSG00000028137',
 'ENSG00000028203',
 'ENSG00000028277',
 'ENSG00000028310',
 'ENSG00000028528',
 'ENSG00000028839',
 'ENSG00000029153',
 'ENSG00000029363',
 'ENSG00000029364',
 'ENSG00000029534',
 'ENSG00000029559',
 'ENSG00000029639',
 'ENSG00000029725',
 'ENSG00000029993',
 'ENSG00000030066',
 'ENSG00000030110',
 'ENSG00000030304',
 'ENSG00000030419',
 'ENSG00000030582',
 'ENSG00000031003',
 'ENSG00000031081',
 'ENSG00000031691',
 'ENSG00000031698',
 'ENSG00000031823',
 'ENSG00000032219',
 'ENSG00000032389',
 'ENSG00000032444',
 'ENSG00000032742',
 'ENSG00000033011',
 'ENSG00000033030',
 'ENSG00000033050',
 'ENSG00000033100',
 'ENSG00000033122',
 'ENSG00000033170',
 'ENSG00000033178',
 'ENSG00000033327',
 'ENSG00000033627',
 'ENSG00000033800',
 'ENSG00000033867',
 'ENSG00000034053',
 'ENSG00000034152',
 'ENSG00000034239',
 'ENSG00000034510',
 'ENSG00000034533',
 'ENSG00000034677',
 'ENSG00000034693',
 'ENSG00000034713',
 'ENSG00000034971',
 'ENSG00000035115',
 'ENSG00000035141',
 'ENSG00000035403',
 'ENSG00000035499',
 'ENSG00000035664',
 'ENSG00000035681',
 'ENSG00000035687',
 'ENSG00000035720',
 'ENSG00000035862',
 'ENSG00000035928',
 'ENSG00000036054',
 'ENSG00000036257',
 'ENSG00000036448',
 'ENSG00000036473',
 'ENSG00000036530',
 'ENSG00000036549',
 'ENSG00000036565',
 'ENSG00000036672',
 'ENSG00000036828',
 'ENSG00000037042',
 'ENSG00000037241',
 'ENSG00000037280',
 'ENSG00000037474',
 'ENSG00000037637',
 'ENSG00000037749',
 'ENSG00000037757',
 'ENSG00000037897',
 'ENSG00000037965',
 'ENSG00000038002',
 'ENSG00000038210',
 'ENSG00000038219',
 'ENSG00000038274',
 'ENSG00000038295',
 'ENSG00000038358',
 'ENSG00000038382',
 'ENSG00000038427',
 'ENSG00000038532',
 'ENSG00000038945',
 'ENSG00000039068',
 'ENSG00000039123',
 'ENSG00000039139',
 'ENSG00000039319',
 'ENSG00000039523',
 'ENSG00000039537',
 'ENSG00000039560',
 'ENSG00000039600',
 'ENSG00000039650',
 'ENSG00000039987',
 'ENSG00000040199',
 'ENSG00000040275',
 'ENSG00000040341',
 'ENSG00000040487',
 'ENSG00000040531',
 'ENSG00000040608',
 'ENSG00000040633',
 'ENSG00000040731',
 'ENSG00000040933',
 'ENSG00000041353',
 'ENSG00000041357',
 'ENSG00000041515',
 'ENSG00000041802',
 'ENSG00000041880',
 'ENSG00000041982',
 'ENSG00000041988',
 'ENSG00000042062',
 'ENSG00000042088',
 'ENSG00000042286',
 'ENSG00000042317',
 'ENSG00000042429',
 'ENSG00000042445',
 'ENSG00000042493',
 'ENSG00000042753',
 'ENSG00000042781',
 'ENSG00000042813',
 'ENSG00000042832',
 'ENSG00000042980',
 'ENSG00000043039',
 'ENSG00000043093',
 'ENSG00000043143',
 'ENSG00000043355',
 'ENSG00000043462',
 'ENSG00000043514',
 'ENSG00000043591',
 'ENSG00000044012',
 'ENSG00000044090',
 'ENSG00000044115',
 'ENSG00000044446',
 'ENSG00000044459',
 'ENSG00000044524',
 'ENSG00000044574',
 'ENSG00000046604',
 'ENSG00000046647',
 'ENSG00000046651',
 'ENSG00000046653',
 'ENSG00000046774',
 'ENSG00000046889',
 'ENSG00000047056',
 'ENSG00000047188',
 'ENSG00000047230',
 'ENSG00000047249',
 'ENSG00000047315',
 'ENSG00000047346',
 'ENSG00000047365',
 'ENSG00000047410',
 'ENSG00000047457',
 'ENSG00000047578',
 'ENSG00000047579',
 'ENSG00000047597',
 'ENSG00000047617',
 'ENSG00000047621',
 'ENSG00000047634',
 'ENSG00000047644',
 'ENSG00000047648',
 'ENSG00000047662',
 'ENSG00000047849',
 'ENSG00000047932',
 'ENSG00000047936',
 'ENSG00000048028',
 'ENSG00000048052',
 'ENSG00000048140',
 'ENSG00000048162',
 'ENSG00000048342',
 'ENSG00000048392',
 'ENSG00000048405',
 'ENSG00000048462',
 'ENSG00000048471',
 'ENSG00000048540',
 'ENSG00000048544',
 'ENSG00000048545',
 'ENSG00000048649',
 'ENSG00000048707',
 'ENSG00000048740',
 'ENSG00000048828',
 'ENSG00000048991',
 'ENSG00000049089',
 'ENSG00000049130',
 'ENSG00000049167',
 'ENSG00000049192',
 'ENSG00000049239',
 'ENSG00000049245',
 'ENSG00000049246',
 'ENSG00000049247',
 'ENSG00000049249',
 'ENSG00000049283',
 'ENSG00000049323',
 'ENSG00000049449',
 'ENSG00000049540',
 'ENSG00000049541',
 'ENSG00000049618',
 'ENSG00000049656',
 'ENSG00000049759',
 'ENSG00000049768',
 'ENSG00000049769',
 'ENSG00000049860',
 'ENSG00000049883',
 'ENSG00000050030',
 'ENSG00000050130',
 'ENSG00000050165',
 'ENSG00000050327',
 'ENSG00000050344',
 'ENSG00000050393',
 'ENSG00000050405',
 'ENSG00000050426',
 'ENSG00000050438',
 'ENSG00000050555',
 'ENSG00000050628',
 'ENSG00000050730',
 'ENSG00000050748',
 'ENSG00000050767',
 'ENSG00000050820',
 'ENSG00000051009',
 'ENSG00000051108',
 'ENSG00000051128',
 'ENSG00000051180',
 'ENSG00000051341',
 'ENSG00000051382',
 'ENSG00000051523',
 'ENSG00000051596',
 'ENSG00000051620',
 'ENSG00000051825',
 'ENSG00000052126',
 'ENSG00000052344',
 'ENSG00000052723',
 'ENSG00000052749',
 'ENSG00000052795',
 'ENSG00000052802',
 'ENSG00000052841',
 'ENSG00000052850',
 'ENSG00000053108',
 'ENSG00000053254',
 'ENSG00000053328',
 'ENSG00000053371',
 'ENSG00000053372',
 'ENSG00000053438',
 'ENSG00000053501',
 'ENSG00000053524',
 'ENSG00000053702',
 'ENSG00000053747',
 'ENSG00000053770',
 'ENSG00000053900',
 'ENSG00000053918',
 'ENSG00000054116',
 'ENSG00000054118',
 'ENSG00000054148',
 'ENSG00000054179',
 'ENSG00000054219',
 'ENSG00000054267',
 'ENSG00000054277',
 'ENSG00000054282',
 'ENSG00000054356',
 'ENSG00000054392',
 'ENSG00000054523',
 'ENSG00000054598',
 'ENSG00000054611',
 'ENSG00000054654',
 'ENSG00000054690',
 'ENSG00000054793',
 'ENSG00000054796',
 'ENSG00000054803',
 'ENSG00000054938',
 'ENSG00000054965',
 'ENSG00000054967',
 'ENSG00000054983',
 'ENSG00000055044',
 'ENSG00000055070',
 'ENSG00000055118',
 'ENSG00000055130',
 'ENSG00000055147',
 'ENSG00000055163',
 'ENSG00000055208',
 'ENSG00000055211',
 'ENSG00000055332',
 'ENSG00000055483',
 'ENSG00000055609',
 'ENSG00000055732',
 'ENSG00000055813',
 'ENSG00000055917',
 'ENSG00000055950',
 'ENSG00000055955',
 'ENSG00000055957',
 'ENSG00000056050',
 'ENSG00000056097',
 'ENSG00000056277',
 'ENSG00000056291',
 'ENSG00000056487',
 'ENSG00000056558',
 'ENSG00000056586',
 'ENSG00000056678',
 'ENSG00000056736',
 'ENSG00000056972',
 'ENSG00000056998',
 'ENSG00000057019',
 'ENSG00000057149',
 'ENSG00000057252',
 'ENSG00000057294',
 'ENSG00000057468',
 'ENSG00000057593',
 'ENSG00000057608',
 'ENSG00000057657',
 'ENSG00000057663',
 'ENSG00000057704',
 'ENSG00000057757',
 'ENSG00000057935',
 'ENSG00000058056',
 'ENSG00000058063',
 'ENSG00000058085',
 'ENSG00000058091',
 'ENSG00000058262',
 'ENSG00000058272',
 'ENSG00000058335',
 'ENSG00000058404',
 'ENSG00000058453',
 'ENSG00000058600',
 'ENSG00000058668',
 'ENSG00000058673',
 'ENSG00000058729',
 'ENSG00000058799',
 'ENSG00000058804',
 'ENSG00000058866',
 'ENSG00000059122',
 'ENSG00000059145',
 'ENSG00000059377',
 'ENSG00000059378',
 'ENSG00000059573',
 'ENSG00000059588',
 'ENSG00000059691',
 'ENSG00000059728',
 'ENSG00000059758',
 'ENSG00000059769',
 'ENSG00000059804',
 'ENSG00000059915',
 'ENSG00000060069',
 'ENSG00000060138',
 'ENSG00000060140',
 'ENSG00000060237',
 'ENSG00000060339',
 'ENSG00000060491',
 'ENSG00000060558',
 'ENSG00000060566',
 'ENSG00000060642',
 'ENSG00000060656',
 'ENSG00000060688',
 'ENSG00000060709',
 'ENSG00000060718',
 'ENSG00000060749',
 'ENSG00000060762',
 'ENSG00000060971',
 'ENSG00000060982',
 'ENSG00000061273',
 'ENSG00000061337',
 'ENSG00000061455',
 'ENSG00000061492',
 'ENSG00000061656',
 'ENSG00000061676',
 'ENSG00000061794',
 'ENSG00000061918',
 'ENSG00000061936',
 'ENSG00000061938',
 'ENSG00000061987',
 'ENSG00000062038',
 'ENSG00000062096',
 'ENSG00000062194',
 'ENSG00000062282',
 'ENSG00000062370',
 'ENSG00000062485',
 'ENSG00000062524',
 'ENSG00000062582',
 'ENSG00000062598',
 'ENSG00000062650',
 'ENSG00000062716',
 'ENSG00000062725',
 'ENSG00000062822',
 'ENSG00000063015',
 'ENSG00000063046',
 'ENSG00000063127',
 'ENSG00000063169',
 'ENSG00000063176',
 'ENSG00000063177',
 'ENSG00000063180',
 'ENSG00000063241',
 'ENSG00000063244',
 'ENSG00000063245',
 'ENSG00000063322',
 'ENSG00000063438',
 'ENSG00000063515',
 'ENSG00000063587',
 'ENSG00000063601',
 'ENSG00000063660',
 'ENSG00000063761',
 'ENSG00000063854',
 'ENSG00000063978',
 'ENSG00000064012',
 'ENSG00000064042',
 'ENSG00000064102',
 'ENSG00000064115',
 'ENSG00000064195',
 'ENSG00000064199',
 'ENSG00000064201',
 'ENSG00000064205',
 'ENSG00000064218',
 'ENSG00000064225',
 'ENSG00000064270',
 'ENSG00000064300',
 'ENSG00000064309',
 'ENSG00000064313',
 'ENSG00000064393',
 'ENSG00000064419',
 'ENSG00000064489',
 'ENSG00000064490',
 'ENSG00000064545',
 'ENSG00000064547',
 'ENSG00000064601',
 'ENSG00000064607',
 'ENSG00000064651',
 'ENSG00000064652',
 'ENSG00000064655',
 'ENSG00000064666',
 'ENSG00000064687',
 'ENSG00000064692',
 'ENSG00000064703',
 'ENSG00000064726',
 'ENSG00000064763',
 'ENSG00000064787',
 'ENSG00000064835',
 'ENSG00000064886',
 'ENSG00000064932',
 'ENSG00000064933',
 'ENSG00000064961',
 'ENSG00000064989',
 'ENSG00000064995',
 'ENSG00000064999',
 'ENSG00000065000',
 'ENSG00000065029',
 'ENSG00000065054',
 'ENSG00000065057',
 'ENSG00000065060',
 'ENSG00000065135',
 'ENSG00000065150',
 'ENSG00000065154',
 'ENSG00000065183',
 'ENSG00000065243',
 'ENSG00000065268',
 'ENSG00000065308',
 'ENSG00000065320',
 'ENSG00000065325',
 'ENSG00000065328',
 'ENSG00000065357',
 'ENSG00000065361',
 'ENSG00000065371',
 'ENSG00000065413',
 'ENSG00000065427',
 'ENSG00000065457',
 'ENSG00000065485',
 'ENSG00000065491',
 'ENSG00000065518',
 'ENSG00000065526',
 'ENSG00000065534',
 'ENSG00000065548',
 'ENSG00000065559',
 'ENSG00000065600',
 'ENSG00000065609',
 'ENSG00000065613',
 'ENSG00000065615',
 'ENSG00000065618',
 'ENSG00000065621',
 'ENSG00000065665',
 'ENSG00000065675',
 'ENSG00000065717',
 'ENSG00000065802',
 'ENSG00000065809',
 'ENSG00000065833',
 'ENSG00000065882',
 'ENSG00000065883',
 'ENSG00000065911',
 'ENSG00000065923',
 'ENSG00000065970',
 'ENSG00000065978',
 'ENSG00000065989',
 'ENSG00000066027',
 'ENSG00000066032',
 'ENSG00000066044',
 'ENSG00000066056',
 'ENSG00000066084',
 'ENSG00000066117',
 'ENSG00000066135',
 'ENSG00000066136',
 'ENSG00000066185',
 'ENSG00000066230',
 'ENSG00000066248',
 'ENSG00000066279',
 'ENSG00000066294',
 'ENSG00000066322',
 'ENSG00000066336',
 'ENSG00000066379',
 'ENSG00000066382',
 'ENSG00000066405',
 'ENSG00000066422',
 'ENSG00000066427',
 'ENSG00000066455',
 'ENSG00000066468',
 'ENSG00000066557',
 'ENSG00000066583',
 'ENSG00000066629',
 'ENSG00000066651',
 'ENSG00000066654',
 'ENSG00000066697',
 'ENSG00000066735',
 'ENSG00000066739',
 'ENSG00000066777',
 'ENSG00000066813',
 'ENSG00000066827',
 'ENSG00000066855',
 'ENSG00000066923',
 'ENSG00000066926',
 'ENSG00000066933',
 'ENSG00000067048',
 'ENSG00000067057',
 'ENSG00000067064',
 'ENSG00000067066',
 'ENSG00000067082',
 'ENSG00000067113',
 'ENSG00000067141',
 'ENSG00000067167',
 'ENSG00000067177',
 'ENSG00000067182',
 'ENSG00000067191',
 'ENSG00000067208',
 ...]
In [2]:
Copied!
check = torch.load('../../data/tensorboard/scprint_test/iuealg88/checkpoints/epoch=0-step=13068.ckpt')
check = torch.load('../../data/tensorboard/scprint_test/iuealg88/checkpoints/epoch=0-step=13068.ckpt')
In [ ]:
Copied!
check
check
In [70]:
Copied!
for i in model.transformer.blocks:
    print(i.mlp.fc1.weight.mean())
for i in model.transformer.blocks: print(i.mlp.fc1.weight.mean())
tensor(0.0010, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(-0.0005, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(-0.0033, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(-0.0056, device='cuda:0', grad_fn=<MeanBackward0>)

tensor(-0.0005, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(-0.0033, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(-0.0056, device='cuda:0', grad_fn=<MeanBackward0>)
In [87]:
Copied!
"gene_encoder.embedding.weight"
"gene_encoder.embedding.weight"
Out[87]:
'gene_encoder.embedding.weight'
In [72]:
Copied!
check.keys()
check.keys()
Out[72]:
dict_keys(['epoch', 'global_step', 'pytorch-lightning_version', 'state_dict', 'loops', 'callbacks', 'optimizer_states', 'lr_schedulers', 'MixedPrecisionPlugin', 'hparams_name', 'hyper_parameters'])
In [81]:
Copied!
check['state_dict']["gene_encoder.embedding.weight"][20]
check['state_dict']["gene_encoder.embedding.weight"][20]
Out[81]:
tensor([-0.0049, -0.0209,  0.0067, -0.0165, -0.0013, -0.0163,  0.0099,  0.0014,
         0.0046,  0.0236,  0.0029,  0.0037, -0.0082,  0.0207,  0.0136,  0.0052,
         0.0035, -0.0023,  0.0339,  0.0160, -0.0114,  0.0137,  0.0289,  0.0159,
        -0.0129,  0.0246, -0.0119, -0.0132,  0.0026,  0.0144,  0.0142,  0.0020,
        -0.0153, -0.0100,  0.0225,  0.0021, -0.0092,  0.0175,  0.0346,  0.0217,
        -0.0244,  0.0340,  0.0142,  0.0009,  0.0054,  0.0105, -0.0213, -0.0084,
        -0.0166, -0.0182,  0.0252, -0.0185,  0.0004, -0.0082,  0.0173,  0.0210,
         0.0083,  0.0090,  0.0130, -0.0079, -0.0015, -0.0037, -0.0185, -0.0254,
        -0.0385,  0.0195,  0.0159,  0.0178,  0.0072,  0.0212, -0.0006, -0.0103,
        -0.0163,  0.0068, -0.0087,  0.0016, -0.0048, -0.0140,  0.0152,  0.0078,
        -0.0148,  0.0151, -0.0207,  0.0151, -0.0344,  0.0186,  0.0099,  0.0351,
         0.0254,  0.0444,  0.0213, -0.0064, -0.0488, -0.0292,  0.0073, -0.0276,
        -0.0056, -0.0302, -0.0243,  0.0071,  0.0246, -0.0097,  0.0188,  0.0185,
        -0.0401,  0.0421, -0.0024,  0.0047, -0.0073, -0.0259,  0.0066,  0.0022,
        -0.0059,  0.0038,  0.0025, -0.0122, -0.0008, -0.0068, -0.0137,  0.0016,
        -0.0092, -0.0056, -0.0142, -0.0115, -0.0185,  0.0196,  0.0171, -0.0008],
       device='cuda:0')
In [82]:
Copied!
model.gene_encoder.embedding.weight[20]
model.gene_encoder.embedding.weight[20]
Out[82]:
tensor([-0.0049, -0.0209,  0.0067, -0.0165, -0.0013, -0.0163,  0.0099,  0.0014,
         0.0046,  0.0236,  0.0029,  0.0037, -0.0082,  0.0207,  0.0136,  0.0052,
         0.0035, -0.0023,  0.0339,  0.0160, -0.0114,  0.0137,  0.0289,  0.0159,
        -0.0129,  0.0246, -0.0119, -0.0132,  0.0026,  0.0144,  0.0142,  0.0020,
        -0.0153, -0.0100,  0.0225,  0.0021, -0.0092,  0.0175,  0.0346,  0.0217,
        -0.0244,  0.0340,  0.0142,  0.0009,  0.0054,  0.0105, -0.0213, -0.0084,
        -0.0166, -0.0182,  0.0252, -0.0185,  0.0004, -0.0082,  0.0173,  0.0210,
         0.0083,  0.0090,  0.0130, -0.0079, -0.0015, -0.0037, -0.0185, -0.0254,
        -0.0385,  0.0195,  0.0159,  0.0178,  0.0072,  0.0212, -0.0006, -0.0103,
        -0.0163,  0.0068, -0.0087,  0.0016, -0.0048, -0.0140,  0.0152,  0.0078,
        -0.0148,  0.0151, -0.0207,  0.0151, -0.0344,  0.0186,  0.0099,  0.0351,
         0.0254,  0.0444,  0.0213, -0.0064, -0.0488, -0.0292,  0.0073, -0.0276,
        -0.0056, -0.0302, -0.0243,  0.0071,  0.0246, -0.0097,  0.0188,  0.0185,
        -0.0401,  0.0421, -0.0024,  0.0047, -0.0073, -0.0259,  0.0066,  0.0022,
        -0.0059,  0.0038,  0.0025, -0.0122, -0.0008, -0.0068, -0.0137,  0.0016,
        -0.0092, -0.0056, -0.0142, -0.0115, -0.0185,  0.0196,  0.0171, -0.0008],
       device='cuda:0')
In [119]:
Copied!
sembeddings[20]
sembeddings[20]
Out[119]:
tensor([ 6.6611e-03,  3.2946e-02,  1.2063e-02,  1.3155e-02, -6.7928e-03,
         3.0272e-02,  1.2692e-02, -5.3972e-03,  1.9142e-02, -2.3445e-03,
        -2.7137e-03,  9.3737e-03, -4.7150e-03,  2.5329e-02, -2.0919e-02,
         3.3505e-02,  1.8250e-02,  1.1739e-02,  2.7701e-02, -1.4182e-02,
         7.5865e-03, -1.8503e-02,  4.2267e-03, -3.1029e-01,  1.1854e-02,
         3.5789e-02, -1.7974e-02, -3.6900e-03,  3.4248e-02, -2.0346e-02,
         2.4250e-02, -3.4916e-02,  1.7738e-02,  9.9715e-03, -1.4236e-02,
         8.7822e-03, -1.9749e-03,  2.2516e-02, -5.4434e-03,  8.0317e-04,
        -9.3315e-03,  1.0459e-02,  2.9404e-02, -2.3727e-02, -1.1082e-02,
         1.0306e-02,  3.4731e-02, -1.6314e-02,  6.2507e-03, -1.3389e-02,
         3.1514e-02,  3.6710e-04, -2.6332e-02, -8.5840e-04,  8.3556e-03,
        -4.2249e-02,  2.4041e-03,  1.5509e-03,  5.7861e-02,  1.9756e-02,
         6.6668e-03, -9.0279e-03, -1.2321e-02,  1.2319e-02,  3.5193e-02,
         5.4685e-02, -2.1232e-02,  5.7735e-03, -4.9492e-03,  6.1288e-02,
        -2.1482e-02,  7.8210e-03,  4.7659e-02,  1.3722e-01,  7.4602e-03,
         3.4081e-03,  1.6790e-02, -4.4877e-02, -2.6832e-03,  1.5035e-02,
         1.2683e-03,  9.2635e-03, -1.1464e-02,  2.5933e-02,  1.2168e-02,
         1.8979e-02,  2.0139e-02,  5.9712e-02,  2.7476e-02,  1.0938e-02,
         2.1327e-02, -1.3357e-02, -1.8981e-02, -2.1589e-03,  2.0913e-02,
         1.9166e-02, -3.0249e-02,  2.7663e-02,  1.4311e-02, -4.3504e-03,
        -8.7722e-03, -6.1208e-03,  5.8541e-03,  4.7430e-02,  1.9528e-02,
        -2.6761e-03,  1.3365e-02, -2.0641e-02,  8.3008e-03,  9.7633e-03,
         1.9733e-02, -1.3637e-02, -1.5420e-02,  4.2381e-02,  3.0103e-02,
         2.0623e-02, -5.0322e-01,  8.5753e-03, -7.8186e-03,  4.5284e-02,
        -3.0636e-03,  3.3920e-02, -2.1339e-02,  1.8148e-02,  8.7961e-03,
        -1.2559e-02,  3.1327e-02,  6.8908e-03], dtype=torch.float64)
In [3]:
Copied!
model.genes[0]
model.genes[0]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[3], line 1
----> 1 model.genes[0]

NameError: name 'model' is not defined
In [4]:
Copied!
#model = scPrint.load_from_checkpoint('../../data/tensorboard/scprint_test/urxxwu28/checkpoints/epoch=0-step=20000.ckpt') #chocolate-surf-8
model = scPrint.load_from_checkpoint('../../data/tensorboard/scprint_test/iuealg88/checkpoints/epoch=0-step=13068.ckpt', precpt_gene_emb = '../../data/temp/embeddings.parquet') #lambent night
#model = scPrint.load_from_checkpoint('../../data/tensorboard/scprint_test/urxxwu28/checkpoints/epoch=0-step=20000.ckpt') #chocolate-surf-8 model = scPrint.load_from_checkpoint('../../data/tensorboard/scprint_test/iuealg88/checkpoints/epoch=0-step=13068.ckpt', precpt_gene_emb = '../../data/temp/embeddings.parquet') #lambent night
scPrint(
  (gene_encoder): GeneEncoder(
    (embedding): Embedding(33890, 128)
    (enc_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (expr_encoder): ContinuousValueEncoder(
    (linear1): Linear(in_features=1, out_features=128, bias=True)
    (activation): ReLU()
    (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (pos_encoder): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (label_encoder): CategoryValueEncoder(
    (embedding): Embedding(8, 128)
    (enc_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  )
  (time_encoder): ContinuousValueEncoder(
    (linear1): Linear(in_features=1, out_features=128, bias=True)
    (activation): ReLU()
    (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer): FlashTransformerEncoder(
    (blocks): ModuleList(
      (0-3): 4 x Block(
        (mixer): MHA(
          (Wqkv): Linear(in_features=128, out_features=384, bias=True)
          (inner_attn): FlashSelfAttention()
          (inner_cross_attn): FlashCrossAttention(
            (drop): Dropout(p=0.1, inplace=False)
          )
          (out_proj): Linear(in_features=128, out_features=128, bias=True)
        )
        (dropout1): Dropout(p=0.1, inplace=False)
        (drop_path1): StochasticDepth(p=0.0, mode=row)
        (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=128, out_features=512, bias=True)
          (activation): GELU(approximate='none')
          (fc2): Linear(in_features=512, out_features=128, bias=True)
        )
        (dropout2): Dropout(p=0.1, inplace=False)
        (drop_path2): StochasticDepth(p=0.0, mode=row)
        (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
      )
    )
    (dropout): Dropout(p=0.1, inplace=False)
    (drop_path): StochasticDepth(p=0.0, mode=row)
    (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
  )
  (expr_decoder): ExprDecoder(
    (fc): Sequential(
      (0): Linear(in_features=128, out_features=128, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
      (2): Dropout(p=0.1, inplace=False)
    )
    (finalfc): Sequential(
      (0): Linear(in_features=128, out_features=128, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
    )
    (depth_encoder): Sequential(
      (0): ContinuousValueEncoder(
        (linear1): Linear(in_features=1, out_features=128, bias=True)
        (activation): ReLU()
        (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (1): Linear(in_features=128, out_features=128, bias=True)
      (2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (3): LeakyReLU(negative_slope=0.01)
      (4): Dropout(p=0.1, inplace=False)
    )
    (pred_var_zero): Linear(in_features=128, out_features=3, bias=True)
    (depth_fc): Sequential(
      (0): Linear(in_features=128, out_features=1, bias=True)
      (1): ReLU()
    )
  )
  (cls_decoders): ModuleDict(
    (cell_type_ontology_term_id): ClsDecoder(
      (decoder): Sequential(
        (0): Linear(in_features=128, out_features=128, bias=True)
        (1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (2): ReLU()
        (3): Dropout(p=0.1, inplace=False)
      )
      (out_layer): Linear(in_features=128, out_features=190, bias=True)
    )
    (disease_ontology_term_id): ClsDecoder(
      (decoder): Sequential(
        (0): Linear(in_features=128, out_features=128, bias=True)
        (1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (2): ReLU()
        (3): Dropout(p=0.1, inplace=False)
      )
      (out_layer): Linear(in_features=128, out_features=18, bias=True)
    )
    (assay_ontology_term_id): ClsDecoder(
      (decoder): Sequential(
        (0): Linear(in_features=128, out_features=128, bias=True)
        (1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (2): ReLU()
        (3): Dropout(p=0.1, inplace=False)
      )
      (out_layer): Linear(in_features=128, out_features=11, bias=True)
    )
    (self_reported_ethnicity_ontology_term_id): ClsDecoder(
      (decoder): Sequential(
        (0): Linear(in_features=128, out_features=128, bias=True)
        (1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (2): ReLU()
        (3): Dropout(p=0.1, inplace=False)
      )
      (out_layer): Linear(in_features=128, out_features=7, bias=True)
    )
    (sex_ontology_term_id): ClsDecoder(
      (decoder): Sequential(
        (0): Linear(in_features=128, out_features=128, bias=True)
        (1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (2): ReLU()
        (3): Dropout(p=0.1, inplace=False)
      )
      (out_layer): Linear(in_features=128, out_features=2, bias=True)
    )
    (organism_ontology_term_id): ClsDecoder(
      (decoder): Sequential(
        (0): Linear(in_features=128, out_features=128, bias=True)
        (1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (2): ReLU()
        (3): Dropout(p=0.1, inplace=False)
      )
      (out_layer): Linear(in_features=128, out_features=2, bias=True)
    )
  )
  (mvc_decoder): MVCDecoder(
    (depth_encoder): Sequential(
      (0): ContinuousValueEncoder(
        (linear1): Linear(in_features=1, out_features=128, bias=True)
        (activation): ReLU()
        (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (1): Linear(in_features=128, out_features=128, bias=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (depth_fc): Sequential(
      (0): Linear(in_features=128, out_features=128, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
      (2): Linear(in_features=128, out_features=1, bias=True)
      (3): ReLU()
    )
    (gene2query): Linear(in_features=128, out_features=128, bias=True)
    (query_activation): Sigmoid()
    (pred_var_zero): Linear(in_features=128, out_features=384, bias=False)
  )
)
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[4], line 2
      1 #model = scPrint.load_from_checkpoint('../../data/tensorboard/scprint_test/urxxwu28/checkpoints/epoch=0-step=20000.ckpt') #chocolate-surf-8
----> 2 model = scPrint.load_from_checkpoint('../../data/tensorboard/scprint_test/iuealg88/checkpoints/epoch=0-step=13068.ckpt', precpt_gene_emb = '../../data/temp/embeddings.parquet') #lambent night

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/lightning/pytorch/core/module.py:1543, in LightningModule.load_from_checkpoint(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)
   1463 @classmethod
   1464 def load_from_checkpoint(
   1465     cls,
   (...)
   1470     **kwargs: Any,
   1471 ) -> Self:
   1472     r"""
   1473     Primary way of loading a model from a checkpoint. When Lightning saves a checkpoint
   1474     it stores the arguments passed to ``__init__``  in the checkpoint under ``"hyper_parameters"``.
   (...)
   1541         y_hat = pretrained_model(x)
   1542     """
-> 1543     loaded = _load_from_checkpoint(
   1544         cls,
   1545         checkpoint_path,
   1546         map_location,
   1547         hparams_file,
   1548         strict,
   1549         **kwargs,
   1550     )
   1551     return cast(Self, loaded)

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/lightning/pytorch/core/saving.py:91, in _load_from_checkpoint(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)
     89     return _load_state(cls, checkpoint, **kwargs)
     90 if issubclass(cls, pl.LightningModule):
---> 91     model = _load_state(cls, checkpoint, strict=strict, **kwargs)
     92     state_dict = checkpoint["state_dict"]
     93     if not state_dict:

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/lightning/pytorch/core/saving.py:157, in _load_state(cls, checkpoint, strict, **cls_kwargs_new)
    155 # load the state_dict on the model automatically
    156 assert strict is not None
--> 157 keys = obj.load_state_dict(checkpoint["state_dict"], strict=strict)
    159 if not strict:
    160     if keys.missing_keys:

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/torch/nn/modules/module.py:2041, in Module.load_state_dict(self, state_dict, strict)
   2036         error_msgs.insert(
   2037             0, 'Missing key(s) in state_dict: {}. '.format(
   2038                 ', '.join('"{}"'.format(k) for k in missing_keys)))
   2040 if len(error_msgs) > 0:
-> 2041     raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
   2042                        self.__class__.__name__, "\n\t".join(error_msgs)))
   2043 return _IncompatibleKeys(missing_keys, unexpected_keys)

RuntimeError: Error(s) in loading state_dict for scPrint:
	Missing key(s) in state_dict: "expr_decoder.depth_encoder.2.weight", "expr_decoder.depth_encoder.2.bias", "cls_decoders.cell_type_ontology_term_id.decoder.0.weight", "cls_decoders.cell_type_ontology_term_id.decoder.0.bias", "cls_decoders.cell_type_ontology_term_id.decoder.1.weight", "cls_decoders.cell_type_ontology_term_id.decoder.1.bias", "cls_decoders.disease_ontology_term_id.decoder.0.weight", "cls_decoders.disease_ontology_term_id.decoder.0.bias", "cls_decoders.disease_ontology_term_id.decoder.1.weight", "cls_decoders.disease_ontology_term_id.decoder.1.bias", "cls_decoders.assay_ontology_term_id.decoder.0.weight", "cls_decoders.assay_ontology_term_id.decoder.0.bias", "cls_decoders.assay_ontology_term_id.decoder.1.weight", "cls_decoders.assay_ontology_term_id.decoder.1.bias", "cls_decoders.self_reported_ethnicity_ontology_term_id.decoder.0.weight", "cls_decoders.self_reported_ethnicity_ontology_term_id.decoder.0.bias", "cls_decoders.self_reported_ethnicity_ontology_term_id.decoder.1.weight", "cls_decoders.self_reported_ethnicity_ontology_term_id.decoder.1.bias", "cls_decoders.sex_ontology_term_id.decoder.0.weight", "cls_decoders.sex_ontology_term_id.decoder.0.bias", "cls_decoders.sex_ontology_term_id.decoder.1.weight", "cls_decoders.sex_ontology_term_id.decoder.1.bias", "cls_decoders.organism_ontology_term_id.decoder.0.weight", "cls_decoders.organism_ontology_term_id.decoder.0.bias", "cls_decoders.organism_ontology_term_id.decoder.1.weight", "cls_decoders.organism_ontology_term_id.decoder.1.bias". 
	Unexpected key(s) in state_dict: "cls_decoders.cell_type_ontology_term_id._decoder.0.weight", "cls_decoders.cell_type_ontology_term_id._decoder.0.bias", "cls_decoders.cell_type_ontology_term_id._decoder.1.weight", "cls_decoders.cell_type_ontology_term_id._decoder.1.bias", "cls_decoders.disease_ontology_term_id._decoder.0.weight", "cls_decoders.disease_ontology_term_id._decoder.0.bias", "cls_decoders.disease_ontology_term_id._decoder.1.weight", "cls_decoders.disease_ontology_term_id._decoder.1.bias", "cls_decoders.assay_ontology_term_id._decoder.0.weight", "cls_decoders.assay_ontology_term_id._decoder.0.bias", "cls_decoders.assay_ontology_term_id._decoder.1.weight", "cls_decoders.assay_ontology_term_id._decoder.1.bias", "cls_decoders.self_reported_ethnicity_ontology_term_id._decoder.0.weight", "cls_decoders.self_reported_ethnicity_ontology_term_id._decoder.0.bias", "cls_decoders.self_reported_ethnicity_ontology_term_id._decoder.1.weight", "cls_decoders.self_reported_ethnicity_ontology_term_id._decoder.1.bias", "cls_decoders.sex_ontology_term_id._decoder.0.weight", "cls_decoders.sex_ontology_term_id._decoder.0.bias", "cls_decoders.sex_ontology_term_id._decoder.1.weight", "cls_decoders.sex_ontology_term_id._decoder.1.bias", "cls_decoders.organism_ontology_term_id._decoder.0.weight", "cls_decoders.organism_ontology_term_id._decoder.0.bias", "cls_decoders.organism_ontology_term_id._decoder.1.weight", "cls_decoders.organism_ontology_term_id._decoder.1.bias". 
In [4]:
Copied!
ensembl = load_genes(['NCBITaxon:9606'])
ensembl['ensembl_gene_id'] = ensembl.index
ensembl = load_genes(['NCBITaxon:9606']) ensembl['ensembl_gene_id'] = ensembl.index

Loading the data¶

In [ ]:
Copied!
adata.sc.read(
  only dataset dropped
)
adata.sc.read( only dataset dropped )
In [110]:
Copied!
adata = sc.read(
    "data/pancreas_atlas.h5ad",
    backup_url="https://figshare.com/ndownloader/files/24539828",
)
adata = sc.read( "data/pancreas_atlas.h5ad", backup_url="https://figshare.com/ndownloader/files/24539828", )
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/anndata/__init__.py:51: FutureWarning: `anndata.read` is deprecated, use `anndata.read_h5ad` instead. `ad.read` will be removed in mid 2024.
  warnings.warn(
In [93]:
Copied!
adata.X.sum(0)
adata.X.sum(0)
Out[93]:
array([ 6171.3677, 17159.143 ,  2168.623 , ..., 17207.316 , 11868.23  ,
       11490.851 ], dtype=float32)
In [19]:
Copied!
#adata = sc.read(
#    "data/lung_atlas.h5ad",
#    backup_url="https://figshare.com/ndownloader/files/24539942",
#)
#adata = sc.read( # "data/lung_atlas.h5ad", # backup_url="https://figshare.com/ndownloader/files/24539942", #)
In [20]:
Copied!
#adata = sc.read_h5ad('/home/ml4ig1/scprint/.lamindb/BljRloq1xjcxRNDpejzI.h5ad')
#adata = sc.read_h5ad('/home/ml4ig1/scprint/.lamindb/BljRloq1xjcxRNDpejzI.h5ad')
In [21]:
Copied!
Preprocessor()(adata.copy())
# cannot preprocess (first of all not raw counts and also doesn't have standardized values)
Preprocessor()(adata.copy()) # cannot preprocess (first of all not raw counts and also doesn't have standardized values)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[21], line 1
----> 1 Preprocessor()(adata.copy())
      2 # cannot preprocess (first of all not raw counts and also doesn't have standardized values)

File ~/Documents code/scDataLoader/scdataloader/preprocess.py:114, in Preprocessor.__call__(self, adata)
    110 # check that it is a count
    111 if (
    112     adata.X.astype(int).sum() != adata.X.sum() and not self.force_preprocess
    113 ):  # check if likely raw data
--> 114     raise ValueError(
    115         "Data is not raw counts, please check layers, find raw data, or bypass with force_preprocess"
    116     )
    117     # please check layers
    118     # if not available count drop
    119 # # cleanup and dropping low expressed genes and unexpressed cells
    120 prevsize = adata.shape[0]

ValueError: Data is not raw counts, please check layers, find raw data, or bypass with force_preprocess
In [111]:
Copied!
# You need to use counts
adata.X = adata.layers['counts']
del adata.layers
# You need to use counts adata.X = adata.layers['counts'] del adata.layers
In [112]:
Copied!
# you need to have ensembl gene ids 
# # removing the other ones
var = adata.var.merge(ensembl.drop_duplicates('symbol').set_index('symbol', drop=False), left_index=True, right_index=True, how='inner').sort_values(by="ensembl_gene_id").set_index('ensembl_gene_id')
adata = adata[:, var['symbol']]
adata.var = var

# adding back the missing genes
unseen = set(ensembl.index) - set(adata.var.index)
# adding them to adata
emptyda = ad.AnnData(
    csr_matrix((adata.shape[0], len(unseen)), dtype=np.float32),
    var=pd.DataFrame(index=list(unseen)),
    obs=pd.DataFrame(index=adata.obs.index),
)
adata = ad.concat([adata, emptyda], axis=1, join="outer", merge="only")
# do a validation function
adata.uns["unseen_genes"] = list(unseen)
adata = adata[:, adata.var.sort_index().index]

# Add at least the organism you are working with
adata.obs['organism_ontology_term_id'] = "NCBITaxon:9606"
adata
# you need to have ensembl gene ids # # removing the other ones var = adata.var.merge(ensembl.drop_duplicates('symbol').set_index('symbol', drop=False), left_index=True, right_index=True, how='inner').sort_values(by="ensembl_gene_id").set_index('ensembl_gene_id') adata = adata[:, var['symbol']] adata.var = var # adding back the missing genes unseen = set(ensembl.index) - set(adata.var.index) # adding them to adata emptyda = ad.AnnData( csr_matrix((adata.shape[0], len(unseen)), dtype=np.float32), var=pd.DataFrame(index=list(unseen)), obs=pd.DataFrame(index=adata.obs.index), ) adata = ad.concat([adata, emptyda], axis=1, join="outer", merge="only") # do a validation function adata.uns["unseen_genes"] = list(unseen) adata = adata[:, adata.var.sort_index().index] # Add at least the organism you are working with adata.obs['organism_ontology_term_id'] = "NCBITaxon:9606" adata
/tmp/ipykernel_21803/632984386.py:21: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.
  adata.obs['organism_ontology_term_id'] = "NCBITaxon:9606"
Out[112]:
AnnData object with n_obs × n_vars = 16382 × 70116
    obs: 'tech', 'celltype', 'size_factors', 'organism_ontology_term_id'
    var: 'uid', 'symbol', 'stable_id', 'ncbi_gene_ids', 'biotype', 'description', 'synonyms', 'organism_id', 'public_source_id', 'created_at', 'updated_at', 'created_by_id', 'mt', 'ribo', 'hb', 'organism'
    uns: 'unseen_genes'
In [113]:
Copied!
adataset = SimpleAnnDataset(adata, obs_to_output=['organism_ontology_term_id'])
col = Collator(organisms=["NCBITaxon:9606",], valid_genes=model.genes, how="most expr", max_len=1000,add_zero_genes=100)#mdataset.encoder['organism_ontology_term_id'])
dataloader = DataLoader(adataset, collate_fn=col, batch_size=64, num_workers=4, shuffle=False)
adataset = SimpleAnnDataset(adata, obs_to_output=['organism_ontology_term_id']) col = Collator(organisms=["NCBITaxon:9606",], valid_genes=model.genes, how="most expr", max_len=1000,add_zero_genes=100)#mdataset.encoder['organism_ontology_term_id']) dataloader = DataLoader(adataset, collate_fn=col, batch_size=64, num_workers=4, shuffle=False)
In [29]:
Copied!
#from scdataloader import AnnDataCollator
#from anndata.experimental import AnnLoader
#dataloader = AnnLoader([adata], collate_fn=AnnDataCollator(max_len=5000, organisms=["NCBITaxon:9606",], org_to_id={'NCBITaxon:9606': 'NCBITaxon:9606'}, how="most expr", logp1=True), batch_size=32, num_workers=4)
#from scdataloader import AnnDataCollator #from anndata.experimental import AnnLoader #dataloader = AnnLoader([adata], collate_fn=AnnDataCollator(max_len=5000, organisms=["NCBITaxon:9606",], org_to_id={'NCBITaxon:9606': 'NCBITaxon:9606'}, how="most expr", logp1=True), batch_size=32, num_workers=4)
In [114]:
Copied!
trainer = Trainer(precision=16)
trainer = Trainer(precision=16)
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/lightning/fabric/connector.py:554: UserWarning: 16 is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
  rank_zero_warn(
INFO: Using 16bit Automatic Mixed Precision (AMP)
2024-02-27 14:54:08,590:INFO - Using 16bit Automatic Mixed Precision (AMP)
INFO: Using 16bit Automatic Mixed Precision (AMP)
2024-02-27 14:54:08,590:INFO - Using 16bit Automatic Mixed Precision (AMP)
INFO: GPU available: True (cuda), used: True
2024-02-27 14:54:08,622:INFO - GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
2024-02-27 14:54:08,625:INFO - TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
2024-02-27 14:54:08,628:INFO - IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
2024-02-27 14:54:08,630:INFO - HPU available: False, using: 0 HPUs
In [32]:
Copied!
model.labels, model.pred_embedding
model.labels, model.pred_embedding
Out[32]:
(['cell_type_ontology_term_id',
  'disease_ontology_term_id',
  'assay_ontology_term_id',
  'self_reported_ethnicity_ontology_term_id',
  'sex_ontology_term_id',
  'organism_ontology_term_id'],
 ['cell_type_ontology_term_id',
  'disease_ontology_term_id',
  'self_reported_ethnicity_ontology_term_id',
  'sex_ontology_term_id'])
In [115]:
Copied!
model.pred_embedding = [
    "cell_type_ontology_term_id",
    "disease_ontology_term_id",
    "self_reported_ethnicity_ontology_term_id",
    "sex_ontology_term_id",
]
model.pred_embedding = [ "cell_type_ontology_term_id", "disease_ontology_term_id", "self_reported_ethnicity_ontology_term_id", "sex_ontology_term_id", ]
In [12]:
Copied!
adata.obs['tech'].value_counts()
adata.obs['tech'].value_counts()
Out[12]:
tech
inDrop3       3605
smartseq2     2394
celseq2       2285
inDrop1       1937
inDrop2       1724
smarter       1492
inDrop4       1303
celseq        1004
fluidigmc1     638
Name: count, dtype: int64
In [13]:
Copied!
adata.obs.celltype.value_counts()
adata.obs.celltype.value_counts()
Out[13]:
celltype
alpha                 5493
beta                  4169
ductal                2142
acinar                1669
delta                 1055
gamma                  699
activated_stellate     464
endothelial            313
quiescent_stellate     193
macrophage              79
mast                    42
epsilon                 32
schwann                 25
t_cell                   7
Name: count, dtype: int64
In [116]:
Copied!
predictions = trainer.predict(model, dataloader)
predictions = trainer.predict(model, dataloader)
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
2024-02-27 14:54:17,071:INFO - LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting: 0it [00:00, ?it/s]
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/lightning/pytorch/loops/prediction_loop.py:234: UserWarning: predict returned None if it was on purpose, ignore this warning...
  self._warning_cache.warn("predict returned None if it was on purpose, ignore this warning...")
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/anndata/_core/anndata.py:183: ImplicitModificationWarning: Transforming to str index.
  warnings.warn("Transforming to str index.", ImplicitModificationWarning)
WARNING: You’re trying to run this on 128 dimensions of `.X`, if you really want this, set `use_rep='X'`.
         Falling back to preprocessing with `sc.pp.pca` and default params.
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/anndata/_core/anndata.py:522: FutureWarning: The dtype argument is deprecated and will be removed in late 2024.
  warnings.warn(
AnnData object with n_obs × n_vars = 16382 × 128
    obs: 'pred_cell_type_ontology_term_id', 'pred_disease_ontology_term_id', 'pred_assay_ontology_term_id', 'pred_self_reported_ethnicity_ontology_term_id', 'pred_sex_ontology_term_id', 'pred_organism_ontology_term_id', 'leiden'
    uns: 'neighbors', 'umap', 'leiden'
    obsm: 'X_pca', 'X_umap'
    obsp: 'distances', 'connectivities'
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
couldn't log to tensorboard
couldn't log to wandb
No description has been provided for this image
In [65]:
Copied!
predictions = trainer.predict(model, dataloader)
predictions = trainer.predict(model, dataloader)
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
2024-02-20 17:26:30,709:INFO - LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:442: PossibleUserWarning: The dataloader, predict_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 40 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
  rank_zero_warn(
Predicting: 0it [00:00, ?it/s]
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/anndata/_core/anndata.py:183: ImplicitModificationWarning: Transforming to str index.
  warnings.warn("Transforming to str index.", ImplicitModificationWarning)
WARNING: You’re trying to run this on 128 dimensions of `.X`, if you really want this, set `use_rep='X'`.
         Falling back to preprocessing with `sc.pp.pca` and default params.
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/anndata/_core/anndata.py:522: FutureWarning: The dtype argument is deprecated and will be removed in late 2024.
  warnings.warn(
AnnData object with n_obs × n_vars = 10112 × 128
    obs: 'pred_cell_type_ontology_term_id', 'pred_disease_ontology_term_id', 'pred_assay_ontology_term_id', 'pred_self_reported_ethnicity_ontology_term_id', 'pred_sex_ontology_term_id', 'pred_organism_ontology_term_id', 'leiden'
    uns: 'neighbors', 'umap', 'leiden'
    obsm: 'X_pca', 'X_umap'
    obsp: 'distances', 'connectivities'
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
couldn't log to tensorboard
couldn't log to wandb
No description has been provided for this image
In [103]:
Copied!
pred_adata = sc.read_h5ad(
    (model.logger.save_dir if model.logger is not None else "/tmp")
    + "/step_"
    + str(model.global_step)
    + "_"
    +""
    + ".h5ad"
)
pred_adata = sc.read_h5ad( (model.logger.save_dir if model.logger is not None else "/tmp") + "/step_" + str(model.global_step) + "_" +"" + ".h5ad" )
In [104]:
Copied!
pred_adata.obs
pred_adata.obs
Out[104]:
pred_cell_type_ontology_term_id pred_disease_ontology_term_id pred_assay_ontology_term_id pred_self_reported_ethnicity_ontology_term_id pred_sex_ontology_term_id pred_organism_ontology_term_id leiden
0 CL:0011020 PATO:0000461 EFO:0008722 HANCESTRO:0005 PATO:0000383 NCBITaxon:9606 3
1 CL:0002138 PATO:0000461 EFO:0030007 HANCESTRO:0005 PATO:0000383 NCBITaxon:9606 6
2 CL:0011020 PATO:0000461 EFO:0030007 HANCESTRO:0005 PATO:0000383 NCBITaxon:9606 6
3 CL:1000271 PATO:0000461 EFO:0030007 HANCESTRO:0005 PATO:0000383 NCBITaxon:9606 21
4 CL:0011020 PATO:0000461 EFO:0030059 HANCESTRO:0005 PATO:0000383 NCBITaxon:9606 14
... ... ... ... ... ... ... ...
16377 CL:1000271 PATO:0000461 EFO:0030007 HANCESTRO:0005 PATO:0000383 NCBITaxon:9606 6
16378 CL:0000128 PATO:0000461 EFO:0009899 HANCESTRO:0005 PATO:0000384 NCBITaxon:9606 23
16379 CL:0002138 PATO:0000461 EFO:0030007 HANCESTRO:0005 PATO:0000383 NCBITaxon:9606 21
16380 CL:0011020 PATO:0000461 EFO:0030059 HANCESTRO:0005 PATO:0000383 NCBITaxon:9606 8
16381 CL:0011020 PATO:0000461 EFO:0030007 HANCESTRO:0005 PATO:0000383 NCBITaxon:9606 8

16382 rows × 7 columns

In [16]:
Copied!
pred_adata.obs
pred_adata.obs
Out[16]:
pred_cell_type_ontology_term_id pred_disease_ontology_term_id pred_assay_ontology_term_id pred_self_reported_ethnicity_ontology_term_id pred_sex_ontology_term_id pred_organism_ontology_term_id leiden
0 CL:0000171 PATO:0000461 EFO:0030002 HANCESTRO:0027 PATO:0000383 NCBITaxon:9606 13
1 CL:0000765 PATO:0000461 EFO:0030002 HANCESTRO:0027 PATO:0000384 NCBITaxon:9606 1
2 CL:0000171 PATO:0000461 EFO:0009899 HANCESTRO:0005 PATO:0000384 NCBITaxon:9606 1
3 CL:0000155 PATO:0000461 EFO:0030002 HANCESTRO:0027 PATO:0000384 NCBITaxon:9606 1
4 CL:1000271 PATO:0000461 EFO:0011025 HANCESTRO:0027 PATO:0000383 NCBITaxon:9606 6
... ... ... ... ... ... ... ...
10011 CL:0000155 PATO:0000461 EFO:0030002 HANCESTRO:0027 PATO:0000383 NCBITaxon:9606 0
10012 CL:1000343 PATO:0000461 EFO:0009899 HANCESTRO:0005 PATO:0000384 NCBITaxon:9606 15
10013 CL:0002064 PATO:0000461 EFO:0009899 HANCESTRO:0005 PATO:0000384 NCBITaxon:9606 15
10014 CL:0002064 PATO:0000461 EFO:0009899 HANCESTRO:0005 PATO:0000384 NCBITaxon:9606 15
10015 CL:0002064 PATO:0000461 EFO:0030002 HANCESTRO:0027 PATO:0000384 NCBITaxon:9606 15

10016 rows × 7 columns

In [105]:
Copied!
expr = np.array(model.expr_pred[0])
expr = np.array(model.expr_pred[0])
In [106]:
Copied!
expr
expr
Out[106]:
array([[  5.4566784,   4.6118417,   4.0348144, ...,   2.1848714,
          3.9399052,   2.5233817],
       [  3.208337 ,   2.5515165,   1.9921018, ...,  13.936381 ,
         13.130431 ,   6.312366 ],
       [  4.8954573,   5.087985 ,   3.9134948, ...,   2.8555708,
          4.554164 , 101.15237  ],
       ...,
       [  3.984802 ,   4.126377 ,   3.6083279, ..., 115.68789  ,
         57.15791  ,  16.6339   ],
       [  5.9352045,   4.8548284,   5.2390785, ...,   1.6146911,
         36.804073 ,  55.141655 ],
       [  5.808983 ,   4.903611 ,   3.8128808, ...,   7.2152896,
          4.651722 ,   2.080887 ]], dtype=float32)
In [68]:
Copied!
expr.shape
expr.shape
Out[68]:
array([[131.65997   ,  29.549952  ,  19.607767  , ...,   1.1995907 ,
          7.7010403 ,   0.73472875],
       [352.7837    ,  88.84994   ,  61.664867  , ...,   8.065009  ,
          1.6457222 ,   1.1151838 ],
       [179.33528   ,  57.207157  ,  43.435993  , ...,   3.1727197 ,
          2.1062756 ,   0.9593918 ],
       ...,
       [223.08798   , 118.48114   ,  54.776947  , ...,   1.082682  ,
          1.2685677 ,   2.6607585 ],
       [130.19698   ,  97.51303   ,  44.644714  , ...,   3.5467129 ,
          2.2846248 ,   0.8338242 ],
       [184.8851    ,  42.39703   ,  12.98097   , ...,  40.693264  ,
         37.05164   ,   2.698467  ]], dtype=float32)
In [107]:
Copied!
expr = np.array(model.expr_pred[0])

expr[np.random.binomial(1, p=np.array(torch.nn.functional.sigmoid(model.expr_pred[2].to(torch.float32)))).astype(bool)] = 0

#expr[expr<=0.5] = 0
#expr[(expr<=1) & (expr>0.5)] = 1
expr = np.array(model.expr_pred[0]) expr[np.random.binomial(1, p=np.array(torch.nn.functional.sigmoid(model.expr_pred[2].to(torch.float32)))).astype(bool)] = 0 #expr[expr<=0.5] = 0 #expr[(expr<=1) & (expr>0.5)] = 1
In [108]:
Copied!
for i in dataloader:
    print(i["x"].shape)
    break
for i in dataloader: print(i["x"].shape) break
torch.Size([64, 1100])

In [1]:
Copied!
size = 64
size = 64
In [56]:
Copied!
expr[:size].mean(), expr[:size].max(), (expr[:size]==0).sum()
expr[:size].mean(), expr[:size].max(), (expr[:size]==0).sum()
Out[56]:
(4.1190968, 310.3621, 126)
In [71]:
Copied!
expr[:size].mean(), expr[:size].max(), (expr[:size]==0).sum()
expr[:size].mean(), expr[:size].max(), (expr[:size]==0).sum()
Out[71]:
(3.9165297, 923.2358, 899)
In [72]:
Copied!
i['x'].mean(), i['x'].max(), (i['x']==0).sum()
i['x'].mean(), i['x'].max(), (i['x']==0).sum()
Out[72]:
(tensor(5.8350), tensor(1597.0111), tensor(11357))
In [109]:
Copied!
import matplotlib.pyplot as plt
import numpy as np

# Compute correlation coefficient
corr_coef = np.corrcoef(expr[:size], i['x'])[size:, :]

# Plot correlation coefficient
plt.figure(figsize=(10, 5))
plt.imshow(corr_coef, cmap='coolwarm', interpolation='none')
plt.colorbar()
plt.title('Correlation Coefficient of expr and i["x"]')
plt.show()
import matplotlib.pyplot as plt import numpy as np # Compute correlation coefficient corr_coef = np.corrcoef(expr[:size], i['x'])[size:, :] # Plot correlation coefficient plt.figure(figsize=(10, 5)) plt.imshow(corr_coef, cmap='coolwarm', interpolation='none') plt.colorbar() plt.title('Correlation Coefficient of expr and i["x"]') plt.show()
No description has been provided for this image
In [73]:
Copied!
import matplotlib.pyplot as plt
import numpy as np

# Compute correlation coefficient
corr_coef = np.corrcoef(expr[:size], i['x'])[size:, :]

# Plot correlation coefficient
plt.figure(figsize=(10, 5))
plt.imshow(corr_coef, cmap='coolwarm', interpolation='none')
plt.colorbar()
plt.title('Correlation Coefficient of expr and i["x"]')
plt.show()
import matplotlib.pyplot as plt import numpy as np # Compute correlation coefficient corr_coef = np.corrcoef(expr[:size], i['x'])[size:, :] # Plot correlation coefficient plt.figure(figsize=(10, 5)) plt.imshow(corr_coef, cmap='coolwarm', interpolation='none') plt.colorbar() plt.title('Correlation Coefficient of expr and i["x"]') plt.show()
No description has been provided for this image
In [63]:
Copied!
import matplotlib.pyplot as plt
import numpy as np

# Compute correlation coefficient
corr_coef = np.corrcoef(expr[:size], i['x'])[size:, :]

# Plot correlation coefficient
plt.figure(figsize=(10, 5))
plt.imshow(corr_coef, cmap='coolwarm', interpolation='none')
plt.colorbar()
plt.title('Correlation Coefficient of expr and i["x"]')
plt.show()
import matplotlib.pyplot as plt import numpy as np # Compute correlation coefficient corr_coef = np.corrcoef(expr[:size], i['x'])[size:, :] # Plot correlation coefficient plt.figure(figsize=(10, 5)) plt.imshow(corr_coef, cmap='coolwarm', interpolation='none') plt.colorbar() plt.title('Correlation Coefficient of expr and i["x"]') plt.show()
No description has been provided for this image
In [77]:
Copied!
pred_adata
pred_adata
Out[77]:
AnnData object with n_obs × n_vars = 10112 × 128
    obs: 'pred_cell_type_ontology_term_id', 'pred_disease_ontology_term_id', 'pred_assay_ontology_term_id', 'pred_self_reported_ethnicity_ontology_term_id', 'pred_sex_ontology_term_id', 'pred_organism_ontology_term_id', 'leiden'
    uns: 'leiden', 'neighbors', 'pred_assay_ontology_term_id_colors', 'pred_cell_type_ontology_term_id_colors', 'pred_disease_ontology_term_id_colors', 'pred_organism_ontology_term_id_colors', 'pred_self_reported_ethnicity_ontology_term_id_colors', 'pred_sex_ontology_term_id_colors', 'umap'
    obsm: 'X_pca', 'X_umap'
    obsp: 'connectivities', 'distances'
In [19]:
Copied!
adata
adata
Out[19]:
AnnData object with n_obs × n_vars = 16382 × 70116
    obs: 'tech', 'celltype', 'size_factors', 'organism_ontology_term_id'
    var: 'uid', 'symbol', 'stable_id', 'ncbi_gene_ids', 'biotype', 'description', 'synonyms', 'organism_id', 'public_source_id', 'created_at', 'updated_at', 'created_by_id', 'mt', 'ribo', 'hb', 'organism'
    uns: 'unseen_genes'
In [24]:
Copied!
subadata = adata#[:10112] # 10016
subadata = adata#[:10112] # 10016
In [75]:
Copied!
sc.pp.pca(subadata)
sc.pp.pca(subadata)
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/preprocessing/_pca.py:229: ImplicitModificationWarning: Setting element `.obsm['X_pca']` of view, initializing view as actual.
  adata.obsm['X_pca'] = X_pca
In [25]:
Copied!
pred_adata.obs.index = subadata.obs.index
#subadata.obsm["Unintegrated"] = subadata.obsm["X_pca"]
subadata.obsm["X_umap"] = pred_adata.obsm["X_umap"]
subadata.obsm["scprint"] = pred_adata.X
pred_adata.obs.index = subadata.obs.index
subadata.obs = pd.concat([subadata.obs, pred_adata.obs], axis=1)
pred_adata.obs.index = subadata.obs.index #subadata.obsm["Unintegrated"] = subadata.obsm["X_pca"] subadata.obsm["X_umap"] = pred_adata.obsm["X_umap"] subadata.obsm["scprint"] = pred_adata.X pred_adata.obs.index = subadata.obs.index subadata.obs = pd.concat([subadata.obs, pred_adata.obs], axis=1)
In [26]:
Copied!
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000171"].celltype.value_counts() #type A
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000171"].celltype.value_counts() #type A
Out[26]:
celltype
acinar                0
activated_stellate    0
alpha                 0
beta                  0
delta                 0
ductal                0
endothelial           0
epsilon               0
gamma                 0
macrophage            0
mast                  0
quiescent_stellate    0
schwann               0
t_cell                0
Name: count, dtype: int64
In [81]:
Copied!
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000171"].celltype.value_counts() #type A
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000171"].celltype.value_counts() #type A
Out[81]:
alpha                 210
beta                  169
acinar                 41
gamma                  22
ductal                 21
delta                  18
macrophage              2
activated_stellate      1
epsilon                 1
quiescent_stellate      1
endothelial             0
mast                    0
schwann                 0
t_cell                  0
Name: celltype, dtype: int64
In [33]:
Copied!
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000171"].celltype.value_counts() #type A
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000171"].celltype.value_counts() #type A
Out[33]:
alpha                 221
beta                  164
ductal                 84
acinar                 26
gamma                  23
endothelial            16
delta                  12
quiescent_stellate      5
activated_stellate      4
macrophage              3
epsilon                 2
mast                    0
schwann                 0
t_cell                  0
Name: celltype, dtype: int64
In [82]:
Copied!
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000169"].celltype.value_counts() #typeB
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000169"].celltype.value_counts() #typeB
Out[82]:
beta                  23
delta                  5
acinar                 3
alpha                  2
activated_stellate     0
ductal                 0
endothelial            0
epsilon                0
gamma                  0
macrophage             0
mast                   0
quiescent_stellate     0
schwann                0
t_cell                 0
Name: celltype, dtype: int64
In [32]:
Copied!
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000169"].celltype.value_counts() #typeB
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000169"].celltype.value_counts() #typeB
Out[32]:
beta                  715
alpha                 399
delta                 142
gamma                  37
acinar                 24
ductal                  7
activated_stellate      2
epsilon                 2
endothelial             0
macrophage              0
mast                    0
quiescent_stellate      0
schwann                 0
t_cell                  0
Name: celltype, dtype: int64
In [83]:
Copied!
subadata.obs.loc[subadata.obs.celltype=="alpha", ['pred_cell_type_ontology_term_id']].value_counts().head(10)
subadata.obs.loc[subadata.obs.celltype=="alpha", ['pred_cell_type_ontology_term_id']].value_counts().head(10)
Out[83]:
pred_cell_type_ontology_term_id
CL:1000271                         1096
CL:0009002                          562
CL:0000158                          383
CL:0000171                          210
CL:0000155                          195
CL:0000583                          176
CL:0000670                          127
CL:0000890                          113
CL:0000765                           77
CL:0002063                           47
dtype: int64
In [ ]:
Copied!
subadata.obs.loc[subadata.obs.celltype=="acinar", ['pred_cell_type_ontology_term_id']].value_counts()
subadata.obs.loc[subadata.obs.celltype=="acinar", ['pred_cell_type_ontology_term_id']].value_counts()
In [24]:
Copied!
sc.pl.scatter(subadata, basis="umap", color=["celltype", "pred_cell_type_ontology_term_id", "tech", "pred_assay_ontology_term_id"])
sc.pl.scatter(subadata, basis="umap", color=["celltype", "pred_cell_type_ontology_term_id", "tech", "pred_assay_ontology_term_id"])
No description has been provided for this image
In [ ]:
Copied!

In [84]:
Copied!
bm = Benchmarker(
    subadata,
    batch_key="tech",
    label_key="celltype",
    embedding_obsm_keys=["Unintegrated", "scprint"],
    n_jobs=6,
)
bm.benchmark()
bm = Benchmarker( subadata, batch_key="tech", label_key="celltype", embedding_obsm_keys=["Unintegrated", "scprint"], n_jobs=6, ) bm.benchmark()
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[84], line 8
      1 bm = Benchmarker(
      2     subadata,
      3     batch_key="tech",
   (...)
      6     n_jobs=6,
      7 )
----> 8 bm.benchmark()

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scib_metrics/benchmark/_core.py:205, in Benchmarker.benchmark(self)
    199     warnings.warn(
    200         "The benchmark has already been run. Running it again will overwrite the previous results.",
    201         UserWarning,
    202     )
    204 if not self._prepared:
--> 205     self.prepare()
    207 num_metrics = sum(
    208     [sum([v is not False for v in asdict(met_col)]) for met_col in self._metric_collection_dict.values()]
    209 )
    211 for emb_key, ad in tqdm(self._emb_adatas.items(), desc="Embeddings", position=0, colour="green"):

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scib_metrics/benchmark/_core.py:174, in Benchmarker.prepare(self, neighbor_computer)
    170 # Compute PCA
    171 if self._pre_integrated_embedding_obsm_key is None:
    172     # This is how scib does it
    173     # https://github.com/theislab/scib/blob/896f689e5fe8c57502cb012af06bed1a9b2b61d2/scib/metrics/pcr.py#L197
--> 174     sc.tl.pca(self._adata, use_highly_variable=False)
    175     self._pre_integrated_embedding_obsm_key = "X_pca"
    177 for emb_key in self._embedding_obsm_keys:

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/preprocessing/_pca.py:200, in pca(data, n_comps, zero_center, svd_solver, random_state, return_info, use_highly_variable, dtype, copy, chunked, chunk_size)
    194 if svd_solver not in {'lobpcg', 'arpack'}:
    195     raise ValueError(
    196         'svd_solver: {svd_solver} can not be used with sparse input.\n'
    197         'Use "arpack" (the default) or "lobpcg" instead.'
    198     )
--> 200 output = _pca_with_sparse(
    201     X, n_comps, solver=svd_solver, random_state=random_state
    202 )
    203 # this is just a wrapper for the results
    204 X_pca = output['X_pca']

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/preprocessing/_pca.py:303, in _pca_with_sparse(X, npcs, solver, mu, random_state)
    292     return XHmat(x) - mhmat(ones(x))
    294 XL = LinearOperator(
    295     matvec=matvec,
    296     dtype=X.dtype,
   (...)
    300     rmatmat=rmatmat,
    301 )
--> 303 u, s, v = svds(XL, solver=solver, k=npcs, v0=random_init)
    304 u, v = svd_flip(u, v)
    305 idx = np.argsort(-s)

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_eigen/_svds.py:525, in svds(A, k, ncv, tol, which, v0, maxiter, return_singular_vectors, solver, random_state, options)
    523 if v0 is None:
    524     v0 = random_state.standard_normal(size=(min(A.shape),))
--> 525 _, eigvec = eigsh(XH_X, k=k, tol=tol ** 2, maxiter=maxiter,
    526                   ncv=ncv, which=which, v0=v0)
    527 # arpack do not guarantee exactly orthonormal eigenvectors
    528 # for clustered eigenvalues, especially in complex arithmetic
    529 eigvec, _ = np.linalg.qr(eigvec)

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_eigen/arpack/arpack.py:1697, in eigsh(A, k, M, sigma, which, v0, ncv, maxiter, tol, return_eigenvectors, Minv, OPinv, mode)
   1695 with _ARPACK_LOCK:
   1696     while not params.converged:
-> 1697         params.iterate()
   1699     return params.extract(return_eigenvectors)

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_eigen/arpack/arpack.py:549, in _SymmetricArpackParams.iterate(self)
    546 elif self.ido == 1:
    547     # compute y = Op*x
    548     if self.mode == 1:
--> 549         self.workd[yslice] = self.OP(self.workd[xslice])
    550     elif self.mode == 2:
    551         self.workd[xslice] = self.OPb(self.workd[xslice])

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_interface.py:234, in LinearOperator.matvec(self, x)
    231 if x.shape != (N,) and x.shape != (N,1):
    232     raise ValueError('dimension mismatch')
--> 234 y = self._matvec(x)
    236 if isinstance(x, np.matrix):
    237     y = asmatrix(y)

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_interface.py:591, in _CustomLinearOperator._matvec(self, x)
    590 def _matvec(self, x):
--> 591     return self.__matvec_impl(x)

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_eigen/_svds.py:469, in svds.<locals>.matvec_XH_X(x)
    468 def matvec_XH_X(x):
--> 469     return XH_dot(X_dot(x))

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_interface.py:234, in LinearOperator.matvec(self, x)
    231 if x.shape != (N,) and x.shape != (N,1):
    232     raise ValueError('dimension mismatch')
--> 234 y = self._matvec(x)
    236 if isinstance(x, np.matrix):
    237     y = asmatrix(y)

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_interface.py:591, in _CustomLinearOperator._matvec(self, x)
    590 def _matvec(self, x):
--> 591     return self.__matvec_impl(x)

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/preprocessing/_pca.py:283, in _pca_with_sparse.<locals>.matvec(x)
    282 def matvec(x):
--> 283     return Xdot(x) - mdot(x)

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/_base.py:411, in _spbase.dot(self, other)
    409     return self * other
    410 else:
--> 411     return self @ other

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/_base.py:624, in _spbase.__matmul__(self, other)
    621 if isscalarlike(other):
    622     raise ValueError("Scalar operands are not allowed, "
    623                      "use '*' instead")
--> 624 return self._mul_dispatch(other)

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/_base.py:522, in _spbase._mul_dispatch(self, other)
    519 if other.__class__ is np.ndarray:
    520     # Fast path for the most common case
    521     if other.shape == (N,):
--> 522         return self._mul_vector(other)
    523     elif other.shape == (N, 1):
    524         return self._mul_vector(other.ravel()).reshape(M, 1)

File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/_compressed.py:488, in _cs_matrix._mul_vector(self, other)
    486 # csr_matvec or csc_matvec
    487 fn = getattr(_sparsetools, self.format + '_matvec')
--> 488 fn(M, N, self.indptr, self.indices, self.data, other, result)
    490 return result

KeyboardInterrupt: 
In [180]:
Copied!
bm.plot_results_table(min_max_scale=False)
bm.plot_results_table(min_max_scale=False)
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/pandas/core/dtypes/cast.py:1641: DeprecationWarning: np.find_common_type is deprecated.  Please use `np.result_type` or `np.promote_types`.
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/numpy/lib/nanfunctions.py:1215: RuntimeWarning: Mean of empty slice
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
No description has been provided for this image
Out[180]:
<plottable.table.Table at 0x7f4ee756d030>
In [ ]:
Copied!

In [ ]:
Copied!

In [ ]:
Copied!
adata.X.iloc[0].to_array()
adata.X.iloc[0].to_array()
In [ ]:
Copied!
model
model

results from running it on most famous methods¶

In [ ]:
Copied!
## load a random dataset
## load a random dataset
In [ ]:
Copied!
## do scGPT finetuning task (reusing the helper functions I have) and the notebook that they provide
## do scGPT finetuning task (reusing the helper functions I have) and the notebook that they provide
In [ ]:
Copied!
## create a task/function for scPrint
## create a task/function for scPrint
In [ ]:
Copied!
## try to do an embedding from the regulon / single cell type matrix outputted by scenic
## try to do an embedding from the regulon / single cell type matrix outputted by scenic
In [ ]:
Copied!
## push it to a function on BenGRN
## push it to a function on BenGRN
In [ ]:
Copied!

Previous Next

Built with MkDocs using a theme provided by Read the Docs.
« Previous Next »